/*** Trends - Empirical Application

	---- PREPARE DATASET ----

This code creates the data set(s) for estimation of the effect of compulsory school reform on IGEs. The following steps are implemented:

1. Load raw data needed for basic reform coding.
2. Implement reform coding using the material from Helena Holmlund.
3. Merge parents and children to individuals of the reform cohorts.
4. Add schooling data
5. Add father's birth year, schooling, income
6. Add mother's birth year, schooling, income
7. Incomes for children, years 1970-2007
8. Merge additional variables for fathers and mothers
9. Generate parental averages and sample dummies
10. Store files main_file.dta and main_file_income.dta.

***************************************************************************************/

* STATA settings:
clear
capture log close
log using ${path2}Logs/${ver}/school_reform_dataset, text replace

** 1. Load basic data

* Clean datafil10.dta, which contains a small number of duplicates, beforehand (once)
use "${path}datafil10.dta" , replace
duplicates drop lopnr, force   
save "${path2}Dat/datafil10_edit.dta" , replace 

use ${path}datafil01.dta, replace

* Get data on municipality of residence of individual in 1960, 1965, etc based on censuses (for reform cohorts only)
merge 1:m lopnr using ${path2}Dat/datafil10_edit.dta, keepusing(kommun60 kommun65) nogen 	 
merge 1:1 lopnr using ${path}forsamlingfob60_m.dta, nogen		
merge 1:1 lopnr using ${path}forsamlingfob65_m.dta, nogen

forval i=60(5)65 {
rename kommun`i' kommun`i'_true
label var kommun`i'_true "Individual's municipality of residence in 19`i'"
rename forsamling`i' forsamling`i'_true
label var forsamling`i'_true "Individual's parish of residence in 19`i'"
}

rename fodelsear foddar
label var foddar "Birth year of individual"

* Municipality of residence
destring kommun60_true kommun65_true, replace
gen kommun60=kommun60_true if foddar<1950 & foddar>1942			
replace kommun60=kommun65_true if foddar>1949 & foddar<1956
replace kommun60=kommun60_true if kommun60==. & foddar<1956 & foddar>1942
replace kommun60=kommun65_true if kommun60==. & foddar<1956 & foddar>1942
label var kommun60 "Municipality of residence for matching of reform dummy"

* Parish of residence 										
destring forsamling60_true forsamling65_true, replace
gen forsamling60=forsamling60_true if foddar<1950 & foddar>1942			
replace forsamling60=forsamling65_true if foddar>1949 & foddar<1956		
replace forsamling60=forsamling60_true if forsamling60==. & foddar<1956 & foddar>1942
replace forsamling60=forsamling65_true if forsamling60==. & foddar<1956 & foddar>19					
label var forsamling60 "Parish of residence for matching of reform dummy"

gen gender=(kon == "Kvinna")   
label var gender "=1 if woman"

keep lopnr foddar kommun60 kommun60_true kommun65_true forsamling60 forsamling60_true forsamling65_true gender dodarman urval		
keep if foddar>=1910

** 2. Implement reform coding

merge m:1 kommun60 using ${path2}Dat/slutgiltiga_reformkommuner_fob60.dta, nogen	 
* This do-file (from Helena Holmlund) creates reform dummy (called "experiment") and sets municipalities with uncertain reform status to missing.

do ${path2}Do/Reform_assignment.do		

replace kommun60=kommun60_true if foddar<1943   
replace kommun60=kommun65_true if kommun60==. & foddar<1943 
replace kommun60=kommun65_true if foddar>1955 & foddar<1960     			

drop kommun*true
ren kommun60 kommun60_   

* set reform dummy to zero before 1943 and to one after 1955
replace experiment=0 if foddar<1943
replace experiment=1 if foddar>1955 & foddar!=.
label var experiment "=1 if individual was likely subject to reformed school type"

** 3. Merge parents and children of individuals in master file

* parents
merge m:1 lopnr using "${path}datafil03.dta", keep(match master) nogen  

drop adopdatummor adopdatumfar lopnradmor lopnradfar		/* We use only biological parents */
label var lopnrbiofar "Id of biological father"
label var lopnrbiomor "Id of biological mother"
save ${path2}Dat/temp.dta, replace     

* children 

use "${path}datafil02.dta", replace
sort lopnr
merge m:1 lopnr using ${path}datafil01.dta , keep(match) keepusing(kon) nogen  
gen gender=(kon == "Kvinna")  
label var gender "=1 if woman"
drop ordnr kon
reshape wide lopnr, i(lopnrbarn) j(gender)
rename lopnr0 lopnrbiofar
rename lopnr1 lopnrbiomor					
ren lopnrbarn lopnr
order lopnr 
sort lopnr
save ${path2}Dat/datafil02_renamed.dta , replace

use ${path2}Dat/temp.dta, replace     
gen fatherfound=(lopnrbiofar!=.)
label var fatherfound "=1 if father's lopnr found"
gen motherfound=(lopnrbiomor!=.)
label var motherfound "=1 if mother's lopnr found"
merge 1:1 lopnr using ${path2}Dat/datafil02_renamed.dta, update nogen   
replace fatherfound=(lopnrbiofar!=.)
replace motherfound=(lopnrbiomor!=.)

order lopnr foddar gender lopnrbiofar lopnrbiomor 
sort lopnr 

** 4. Schooling (Merging and coding educational attainment from educational registers)

* 1970 census
merge m:1 lopnr using ${path2}Dat/datafil10_edit.dta, keep(match master) keepusing(utbniva70) nogen
rename utbniva70 educ70

* Years of education corresponding to highest level of attainment
gen 	educyrs70=20 if educ70== "7" 
replace educyrs70=16 if educ70== "6"
replace educyrs70=14 if educ70== "5"
replace educyrs70=12 if educ70== "4"
replace educyrs70=11 if educ70== "3"
replace educyrs70=9 if educ70== "2"
replace educyrs70=7 if educ70== "1"
label var educyrs70 "Years of schooling (coded) 1970"
gen byte educyrs70found = (educyrs70!=.)
label var educyrs70found "=1 if educyrs70!=."
drop educ70 
	
* Modern educational registers (1990-)
merge 1:1 lopnr using "${path}datafil11_utb1990", keep(match master) nogen keepusing(hutbsun)
gen educ90=real(hutbsun)
drop hutbsun
merge 1:1 lopnr using "${path}datafil11_utb1993", keep(match master) nogen keepusing(hutbsun)
gen educ93=real(hutbsun)
drop hutbsun
merge 1:1 lopnr using "${path}datafil11_utb1996", keep(match master) nogen keepusing(hutbsun)
gen educ96=real(hutbsun)
drop hutbsun
merge 1:1 lopnr using "${path}datafil11_utb1999", keep(match master) nogen keepusing(hutbsun)
gen educ99=real(hutbsun)
drop hutbsun
merge 1:m lopnr using "${path}datafil11_utb2003", keep(match master) nogen keepusing(sun2000niva)		
gen educ2003_sun2000=real(sun2000niva) 									
duplicates drop lopnr, force   	
drop sun2000niva
merge 1:1 lopnr using "${path}utbild2006", keep(match master) nogen keepusing(sun2000niva)
gen educ2006_sun2000=real(sun2000niva)
drop sun2000niva
merge 1:1 lopnr using "${path}utbild2007", keep(match master) nogen keepusing(sun2000niva)
gen educ2007_sun2000=real(sun2000niva) 
drop sun2000niva

* Use highest value found in data
gen sun = educ90
replace sun = educ93 if sun==. | (educ93>sun & educ93!=.)
replace sun = educ96 if sun==. | (educ96>sun & educ96!=.)
replace sun = educ99 if sun==. | (educ99>sun & educ99!=.)

* Convert old educ codes (pre year 2000) to new SUN levels
merge m:1 sun using ${path2}Dat/sun2000_converter.dta, keep(match master) nogen
replace sun2000niva = educ2003_sun2000 if educ2003_sun2000!=.	
replace sun2000niva = educ2006_sun2000 if educ2006_sun2000!=.
replace sun2000niva = educ2007_sun2000 if educ2007_sun2000!=.
rename sun2000niva educ

replace educ=204 if sun==2349  & educ==.
replace educ=337 if sun==24200 & educ==.
replace educ=327 if sun==33208 & educ==.
replace educ=320 if sun==33899 & educ==.
replace educ=336 if sun==34721 & educ==.
replace educ=336 if sun==34722 & educ==.
replace educ=336 if sun==34723 & educ==.
replace educ=536 if sun==36298 & educ==.
replace educ=537 if sun==36302 & educ==.
replace educ=537 if sun==36302 & educ==.
replace educ=537 if sun==36398 & educ==.
replace educ=547 if sun==36498 & educ==.
replace educ=536 if sun==36680 & educ==.
replace educ=536 if sun==36681 & educ==.
replace educ=536 if sun==36688 & educ==.
replace educ=530 if sun==36899 & educ==.
replace educ=330 if sun==44899 & educ==.
replace educ=536 if sun==46298 & educ==.
replace educ=547 if sun==46498 & educ==.
replace educ=537 if sun==66888 & educ==.
replace educ=547 if sun==76298 & educ==.

cap drop educ90 educ93 educ96 educ99 educ2003_sun2000 educ2006_sun2000 educ2007_sun2000 sun suntext sun2000inr 

* Convert to years
gen educyrs=20 if educ==640
replace educyrs=18 if educ>599 & educ<640
replace educyrs=17 if educ<600 & educ>549
replace educyrs=16 if educ<550 & educ>539
replace educyrs=15 if educ<540 & educ>529
replace educyrs=14 if educ<530 & educ>519
replace educyrs=13 if educ>399 & educ<500
replace educyrs=12 if educ>329 & educ<400
replace educyrs=11 if educ>319 & educ<330
replace educyrs=10 if educ>309 & educ<320
replace educyrs=9 if educ>199 & educ<300
replace educyrs=7 if educ<200
label var educyrs "Years of schooling (coded) 1990+"
gen educyrsfound = (educyrs!=.)
label var educyrsfound "=1 if educyrs!=."
drop educ
sort lopnr

save ${path2}Dat/temp2.dta, replace

** 5. Father's birth year, schooling, income

rename lopnr lopnrchild
rename lopnrbiofar lopnr

sort lopnr
merge m:1 lopnr using "${path}datafil01.dta", keep(match master) keepusing(fodelsear) nogen
rename fodelsear f_byear
label var f_byear "Father's birth year"

* 1970 census
merge m:1 lopnr using ${path2}Dat/datafil10_edit.dta, keep(match master) keepusing(utbniva70) nogen
rename utbniva70 f_educ70

gen 	f_educyrs70=20 if f_educ70== "7" 
replace f_educyrs70=16 if f_educ70== "6"
replace f_educyrs70=14 if f_educ70== "5"
replace f_educyrs70=12 if f_educ70== "4"
replace f_educyrs70=11 if f_educ70== "3"
replace f_educyrs70=9 if f_educ70== "2"
replace f_educyrs70=7 if f_educ70== "1"
label var f_educyrs70 "Father's years of schooling (coded) 1970"
gen f_educyrs70found = (f_educyrs70!=.)
drop f_educ70
	
* 1990+ educational registers
merge m:1 lopnr using "${path}datafil11_utb1990", keep(match master) nogen keepusing(hutbsun)
gen educ90=real(hutbsun)
drop hutbsun
merge m:1 lopnr using "${path}datafil11_utb1993", keep(match master) nogen keepusing(hutbsun)
gen educ93=real(hutbsun)
drop hutbsun
merge m:1 lopnr using "${path}datafil11_utb1996", keep(match master) nogen keepusing(hutbsun)
gen educ96=real(hutbsun)
drop hutbsun
merge m:1 lopnr using "${path}datafil11_utb1999", keep(match master) nogen keepusing(hutbsun)
gen educ99=real(hutbsun)
drop hutbsun
merge m:m lopnr using "${path}datafil11_utb2003", keep(match master) nogen keepusing(sun2000niva)	//The m:m is innocuous since all duplicates in using data are fully missing and will later be dropped
gen educ2003_sun2000=real(sun2000niva) 									
drop sun2000niva
merge m:1 lopnr using "${path}utbild2006", keep(match master) nogen keepusing(sun2000niva)
gen educ2006_sun2000=real(sun2000niva)
drop sun2000niva
merge m:1 lopnr using "${path}utbild2007", keep(match master) nogen keepusing(sun2000niva)
gen educ2007_sun2000=real(sun2000niva) 
drop sun2000niva

* Use highest value found in data
gen sun = educ90
replace sun = educ93 if sun==. | (educ93>sun & educ93!=.)
replace sun = educ96 if sun==. | (educ96>sun & educ96!=.)
replace sun = educ99 if sun==. | (educ99>sun & educ99!=.)

* Convert old educ codes (pre year 2000) to new SUN levels
merge m:1 sun using ${path2}Dat/sun2000_converter.dta, keep(match master) nogen
replace sun2000niva = educ2003_sun2000 if educ2003_sun2000!=.	
replace sun2000niva = educ2006_sun2000 if educ2006_sun2000!=.
replace sun2000niva = educ2007_sun2000 if educ2007_sun2000!=.
rename sun2000niva educ	

replace educ=204 if sun==2349 & educ==.
replace educ=337 if sun==24200 & educ==.
replace educ=327 if sun==33208 & educ==.
replace educ=320 if sun==33899 & educ==.
replace educ=336 if sun==34721 & educ==.
replace educ=336 if sun==34722 & educ==.
replace educ=336 if sun==34723 & educ==.
replace educ=536 if sun==36298 & educ==.
replace educ=537 if sun==36302 & educ==.
replace educ=537 if sun==36302 & educ==.
replace educ=537 if sun==36398 & educ==.
replace educ=547 if sun==36498 & educ==.
replace educ=536 if sun==36680 & educ==.
replace educ=536 if sun==36681 & educ==.
replace educ=536 if sun==36688 & educ==.
replace educ=530 if sun==36899 & educ==.
replace educ=330 if sun==44899 & educ==.
replace educ=536 if sun==46298 & educ==.
replace educ=547 if sun==46498 & educ==.
replace educ=537 if sun==66888 & educ==.
replace educ=547 if sun==76298 & educ==.

cap drop educ90 educ93 educ96 educ99 educ2003_sun2000 educ2006_sun2000 educ2007_sun2000 sun suntext sun2000inr 

* Convert to years
gen 	f_educyrs90=20 if educ==640
replace f_educyrs90=18 if educ>599 & educ<640
replace f_educyrs90=17 if educ<600 & educ>549
replace f_educyrs90=16 if educ<550 & educ>539
replace f_educyrs90=15 if educ<540 & educ>529
replace f_educyrs90=14 if educ<530 & educ>519
replace f_educyrs90=13 if educ>399 & educ<500
replace f_educyrs90=12 if educ>329 & educ<400
replace f_educyrs90=11 if educ>319 & educ<330
replace f_educyrs90=10 if educ>309 & educ<320
replace f_educyrs90=9 if educ>199 & educ<300
replace f_educyrs90=7 if educ<200
label var f_educyrs90 "Father's years of schoolign (coded) 1990+"
drop educ
gen f_educyrs90found = (f_educyrs90!=.)
label var f_educyrs90found "=1 if f_educyrs90!=."

* Income 
forval num = 68/99      {
	merge m:1 lopnr using ${path}ar`num'k.dta, keepusing(lopnr cfviki) keep(match master) nogen
	rename cfviki f_cfviki19`num'
}

forval num = 0/7      {
	merge m:1 lopnr using ${path}ar0`num'k.dta, keepusing(lopnr cfviki) keep(match master) nogen
	rename cfviki f_cfviki200`num'
}

* Correcting and labeling incomes
foreach var of varlist f_cfviki1978 - f_cfviki1993 {	
	replace `var'=`var'*100
	// Correct for that incomes are right-censored between the years 1978-1993 
}

forval num = 1968/2007      {
	rename f_cfviki`num' f_inc`num'
	label var f_inc`num' "Total income in `num' of father"
}   

* Deflating annual incomes, using CPI-numbers as scalars (CPI all items, 2005=100) 
scalar cpi1960=		9.9		
scalar cpi1961=		10.2
scalar cpi1962=		10.6
scalar cpi1963=		10.9
scalar cpi1964=		11.3
scalar cpi1965=		11.9
scalar cpi1966=		12.6
scalar cpi1967=		13.2
scalar cpi1968=		13.4
scalar cpi1969=		13.8
scalar cpi1970=		14.8
scalar cpi1971=		15.9
scalar cpi1972=		16.8
scalar cpi1973=		17.9
scalar cpi1974=		19.7
scalar cpi1975=		21.6
scalar cpi1976=		23.9
scalar cpi1977=		26.6
scalar cpi1978=		29.3
scalar cpi1979=		31.4
scalar cpi1980=		35.7
scalar cpi1981=		40.0
scalar cpi1982=		43.4
scalar cpi1983=		47.3
scalar cpi1984=		51.1
scalar cpi1985=		54.8
scalar cpi1986=		57.2
scalar cpi1987=		59.5
scalar cpi1988=		63.0
scalar cpi1989=		67.1
scalar cpi1990=		74.0
scalar cpi1991=		81.0
scalar cpi1992=		82.9
scalar cpi1993=		86.9
scalar cpi1994=		88.7
scalar cpi1995=		90.9
scalar cpi1996=		91.4
scalar cpi1997=		92.0
scalar cpi1998=		91.8
scalar cpi1999=		92.2
scalar cpi2000=		93.0
scalar cpi2001=		95.2
scalar cpi2002=		97.3
scalar cpi2003=		99.2
scalar cpi2004=		99.5
scalar cpi2005=		100.0
scalar cpi2006=		101.4
scalar cpi2007=		103.6

forval num = 1968/2007 {
	gen f_rinc`num' = (f_inc`num'/cpi`num')*100
	label var f_rinc`num' "Inflation-adjusted income in `num' of father"
}
drop f_inc*

* Income by age (fathers)
forval num = 30/65  {
	gen f_rinc`num'=.
	}

forval num = 30/65  {
	forval i = 1968/2007 {
		replace f_rinc`num'=f_rinc`i' if `i'-`num'==f_byear
		}
	}

rename lopnr lopnrbiofar

** 6. Mother's birth year, schooling and income

rename lopnrbiomor lopnr
sort lopnr
merge m:m lopnr using "${path}datafil01.dta", keep(match master) keepusing(fodelsear) nogen
rename fodelsear m_byear
label var m_byear "Mother's birth year"

* 1970 census
merge m:1 lopnr using ${path2}Dat/datafil10_edit.dta, keep(match master) keepusing(utbniva70) nogen
rename utbniva70 m_educ70

gen 	m_educyrs70=20 if m_educ70== "7" 
replace m_educyrs70=16 if m_educ70== "6"
replace m_educyrs70=14 if m_educ70== "5"
replace m_educyrs70=12 if m_educ70== "4"
replace m_educyrs70=11 if m_educ70== "3"
replace m_educyrs70=9 if m_educ70== "2"
replace m_educyrs70=7 if m_educ70== "1"
label var m_educyrs70 "Mother's years of schooling (coded) 1970"
gen m_educyrs70found = (m_educyrs70!=.)
drop m_educ70

* 1990+ educational registers
merge m:1 lopnr using "${path}datafil11_utb1990", keep(match master) nogen keepusing(hutbsun)
gen educ90=real(hutbsun)
drop hutbsun
merge m:1 lopnr using "${path}datafil11_utb1993", keep(match master) nogen keepusing(hutbsun)
gen educ93=real(hutbsun)
drop hutbsun
merge m:1 lopnr using "${path}datafil11_utb1996", keep(match master) nogen keepusing(hutbsun)
gen educ96=real(hutbsun)
drop hutbsun
merge m:1 lopnr using "${path}datafil11_utb1999", keep(match master) nogen keepusing(hutbsun)
gen educ99=real(hutbsun)
drop hutbsun
merge m:m lopnr using "${path}datafil11_utb2003", keep(match master) nogen keepusing(sun2000niva)	//The m:m is innocuous since all duplicates in using data are fully missing and will later be dropped
gen educ2003_sun2000=real(sun2000niva) 									
drop sun2000niva
merge m:1 lopnr using "${path}utbild2006", keep(match master) nogen keepusing(sun2000niva)
gen educ2006_sun2000=real(sun2000niva)
drop sun2000niva
merge m:1 lopnr using "${path}utbild2007", keep(match master) nogen keepusing(sun2000niva)
gen educ2007_sun2000=real(sun2000niva) 
drop sun2000niva

* Use highest value found in data
gen sun = educ90
replace sun = educ93 if sun==. | (educ93>sun & educ93!=.)
replace sun = educ96 if sun==. | (educ96>sun & educ96!=.)
replace sun = educ99 if sun==. | (educ99>sun & educ99!=.)

* Convert old educ codes (pre year 2000) to new SUN levels
merge m:1 sun using ${path2}Dat/sun2000_converter.dta, keep(match master) nogen
replace sun2000niva = educ2003_sun2000 if educ2003_sun2000!=.	
replace sun2000niva = educ2006_sun2000 if educ2006_sun2000!=.
replace sun2000niva = educ2007_sun2000 if educ2007_sun2000!=.
rename sun2000niva educ	

replace educ=204 if sun==2349 & educ==.
replace educ=337 if sun==24200 & educ==.
replace educ=327 if sun==33208 & educ==.
replace educ=320 if sun==33899 & educ==.
replace educ=336 if sun==34721 & educ==.
replace educ=336 if sun==34722 & educ==.
replace educ=336 if sun==34723 & educ==.
replace educ=536 if sun==36298 & educ==.
replace educ=537 if sun==36302 & educ==.
replace educ=537 if sun==36302 & educ==.
replace educ=537 if sun==36398 & educ==.
replace educ=547 if sun==36498 & educ==.
replace educ=536 if sun==36680 & educ==.
replace educ=536 if sun==36681 & educ==.
replace educ=536 if sun==36688 & educ==.
replace educ=530 if sun==36899 & educ==.
replace educ=330 if sun==44899 & educ==.
replace educ=536 if sun==46298 & educ==.
replace educ=547 if sun==46498 & educ==.
replace educ=537 if sun==66888 & educ==.
replace educ=547 if sun==76298 & educ==.

cap drop educ90 educ93 educ96 educ99 educ2003_sun2000 educ2006_sun2000 educ2007_sun2000 sun suntext sun2000inr 

* Convert to years
gen 	m_educyrs90=20 if educ==640
replace m_educyrs90=18 if educ>599 & educ<640
replace m_educyrs90=17 if educ<600 & educ>549
replace m_educyrs90=16 if educ<550 & educ>539
replace m_educyrs90=15 if educ<540 & educ>529
replace m_educyrs90=14 if educ<530 & educ>519
replace m_educyrs90=13 if educ>399 & educ<500
replace m_educyrs90=12 if educ>329 & educ<400
replace m_educyrs90=11 if educ>319 & educ<330
replace m_educyrs90=10 if educ>309 & educ<320
replace m_educyrs90=9 if educ>199 & educ<300
replace m_educyrs90=7 if educ<200
label var m_educyrs90 "Mother's years of education"
drop educ
gen m_educyrs90found = (m_educyrs90!=.)
label var m_educyrs90found "=1 if m_educyrs90!=."

* Income 
forval num = 68/99      {
	merge m:1 lopnr using ${path}ar`num'k.dta, keepusing(lopnr cfviki) keep(match master) nogen
	rename cfviki m_cfviki19`num'
}

forval num = 0/7      {
	merge m:1 lopnr using ${path}ar0`num'k.dta, keepusing(lopnr cfviki) keep(match master) nogen
	rename cfviki m_cfviki200`num'
}

* Correcting and labeling incomes
foreach var of varlist m_cfviki1978 - m_cfviki1993 {	
	replace `var'=`var'*100
}

forval num = 1968/2007      {
	rename m_cfviki`num' m_inc`num'
	label var m_inc`num' "Total income in `num' of mother"
}   

forval num = 1968/2007 {
	gen m_rinc`num' = (m_inc`num'/cpi`num')*100
	label var m_rinc`num' "Inflation-adjusted income in `num' of mother"
}
drop m_inc*

* Income by age (mothers)
forval num = 30/65  {
	gen m_rinc`num'=.
	}

forval num = 30/65  {
	forval i = 1968/2007 {
		replace m_rinc`num'=m_rinc`i' if `i'-`num'==m_byear
		}
	}

rename lopnr lopnrbiomor
rename lopnrchild lopnr
order lopnr foddar gender educyrs* lopnrbiofar lopnrbiomor f_byear f_educyrs* m_byear m_educyrs* *found
sort lopnr

** 7. Incomes for children, years 1970-2007

duplicates drop lopnr , force 

forval num = 68/99      {
     	merge 1:1 lopnr using ${path}ar`num'k.dta, keepusing(lopnr cfviki) keep(match master) nogen
	rename cfviki cfviki19`num'
     }
     
forval num = 0/7      {
     	merge 1:1 lopnr using ${path}ar0`num'k.dta, keepusing(lopnr cfviki) keep(match master) nogen
	rename cfviki cfviki200`num'
     }

* Correcting and renaming incomes
foreach var of varlist cfviki1978 - cfviki1993 {	
	replace `var'=`var'*100
}	
forval num = 1968/2007      {
     rename cfviki`num' inc`num'
     label var inc`num' "Total income in `num'"
     }

forval num = 1968/2007 {
     gen rinc`num' = (inc`num'/cpi`num')*100
     label var rinc`num' "Inflation-adjusted income in `num'"
     }	
drop inc*

* Income by age (offspring)
forval num = 30/50  {
	gen rinc`num'=.
}

forval num = 30/50  {
	forval i = 1968/2007 {
		replace rinc`num'=rinc`i' if `i'-`num'==foddar
	}
}

* Generate the baseline income variables used for estimation
egen lifeinc3035=rowmean(rinc30-rinc35)
gen l_lifeinc3035=log(lifeinc3035)

egen f_lifeinc5359=rowmean(f_rinc53-f_rinc59)		
gen f_l_lifeinc5359=log(f_lifeinc5359)

egen m_lifeinc5359=rowmean(m_rinc53-m_rinc59)		
gen m_l_lifeinc5359=log(m_lifeinc5359)

egen f_lifeinc3545=rowmean(f_rinc35-f_rinc45)		
gen f_l_lifeinc3545=log(f_lifeinc3545)

egen m_lifeinc3545=rowmean(m_rinc35-m_rinc45)		
gen m_l_lifeinc3545=log(m_lifeinc3545)

* save main data file with all cohorts; select required cohorts later
compress
save ${path2}Dat/main_file.dta, replace

** 8. Merge additional variables for fathers and mothers

* Rename child variables
ren kommun60_ kommun60_child
ren experiment experimentchild
ren lopnr lopnrchild
ren urval urvalchild
ren firstcohort60 firstcohort60child

* Fathers
ren lopnrbiofar lopnr
merge m:1 lopnr using ${path2}Dat/main_file.dta , keep(match master) update keepusing(experiment urval kommun60_ firstcohort60) nogen
ren lopnr lopnrbiofar
ren experiment f_experiment  
replace f_experiment = 0 if f_byear<1943
ren kommun60_ f_kommun60_
ren firstcohort60 f_firstcohort60
ren urval f_urval

* Mothers
ren lopnrbiomor lopnr
merge m:1 lopnr using ${path2}Dat/main_file.dta , keep(match master) update keepusing(experiment urval kommun60_) nogen
ren lopnr lopnrbiomor
ren experiment m_experiment  
replace m_experiment = 0 if m_byear<1943
ren kommun60_ m_kommun60_
ren urval m_urval

* Rename back child variables
ren lopnrchild lopnr
ren kommun60_child kommun60_
ren experimentchild experiment
ren firstcohort60child firstcohort60
ren urvalchild urval

* Tag individuals who were sampled because they were siblings of individuals in the "urval" (original) sample. 
bysort lopnrbiomor: egen urval_genmax = max(urval)
sort lopnr
gen urvalsibling=(f_urval==0 & m_urval==0 & urval==0 & urval_genmax==1)
label var urvalsibling "=1 for individuals who were sampled as siblings of the urval sample indiv." 
drop urval_genmax 

** 9. Generate parental averages and sample dummies

* Combine education variables for parents from 1970 and 1990+
gen f_educyrs=f_educyrs70
replace f_educyrs=f_educyrs90 if (f_educyrs70<f_educyrs90 & f_educyrs90!=.) | (f_educyrs70==. & f_educyrs90!=.)
label var f_educyrs "Maximum of father's years of schooling (coded) 1970 and 1990+"
gen m_educyrs=m_educyrs70
replace m_educyrs=m_educyrs90 if (m_educyrs70<m_educyrs90 & m_educyrs90!=.) | (m_educyrs70==. & m_educyrs90!=.)
label var m_educyrs "Maximum of mother's years of schooling (coded) 1970 and 1990+"

* Parental maximum education
egen p_educyrs70=rowmax(f_educyrs70 m_educyrs70)
label var p_educyrs70 "Maximum of father's and mother's years of schooling (coded) 1970"
egen p_educyrs90=rowmax(f_educyrs90 m_educyrs90)
label var p_educyrs90 "Maximum of father's and mother's years of schooling (coded) 1990+"
egen p_educyrs  =rowmax(f_educyrs m_educyrs)
label var p_educyrs "Maximum of father's and mother's years of schooling (coded) combined 1970/90+"

* Parental maximum income
egen p_l_lifeinc5359=rowmax(f_l_lifeinc5359 m_l_lifeinc5359)
label var p_l_lifeinc5359 "Maximum of father's and mother's income ages 53 to 59"
egen p_l_lifeinc3545=rowmax(f_l_lifeinc3545 m_l_lifeinc3545)
label var p_l_lifeinc3545 "Maximum of father's and mother's income ages 35 to 45"

* Use father's reform if education non-missing for both parents and father has more education
gen 	p_byear_ed = f_byear if f_educyrs>=m_educyrs & f_educyrs!=. & m_educyrs!=.
replace p_byear_ed = m_byear if f_educyrs< m_educyrs & f_educyrs!=. & m_educyrs!=.
replace p_byear_ed = f_byear if f_educyrs!=. & m_educyrs==.
replace p_byear_ed = m_byear if f_educyrs==. & m_educyrs!=.
label var p_byear_ed "Birth year of parent who has highest years of schooling in combined x_educyrs variable"

gen 	p_experiment_ed = f_experiment if f_educyrs>=m_educyrs & f_educyrs!=. & m_educyrs!=. 
replace p_experiment_ed = m_experiment if f_educyrs< m_educyrs & f_educyrs!=. & m_educyrs!=.
replace p_experiment_ed = f_experiment if f_educyrs!=. & m_educyrs==.
replace p_experiment_ed = m_experiment if f_educyrs==. & m_educyrs!=.
label var p_experiment_ed "=1 if parent who has highest years of schooling in combined x_educyrs variable was subject to reform"

gen 	p_kommun60_ed = f_kommun60_ if f_educyrs>=m_educyrs & f_educyrs!=. & m_educyrs!=. 
replace p_kommun60_ed = m_kommun60_ if f_educyrs< m_educyrs & f_educyrs!=. & m_educyrs!=.
replace p_kommun60_ed = f_kommun60_ if f_educyrs!=. & m_educyrs==.
replace p_kommun60_ed = m_kommun60_ if f_educyrs==. & m_educyrs!=.
label var p_kommun60_ed "Municipality of parent who has highest years of schooling in combined x_educyrs variable"

* Use father's reform if income non-missing for both parents and father has more income (two versions)
gen 	p_byear_inc1 = f_byear if f_l_lifeinc5359>=m_l_lifeinc5359 & f_l_lifeinc5359!=. & m_l_lifeinc5359!=. 
replace p_byear_inc1 = m_byear if f_l_lifeinc5359< m_l_lifeinc5359 & f_l_lifeinc5359!=. & m_l_lifeinc5359!=.
replace p_byear_inc1 = f_byear if f_l_lifeinc5359!=. & m_l_lifeinc5359==.
replace p_byear_inc1 = m_byear if f_l_lifeinc5359==. & m_l_lifeinc5359!=.
label var p_byear_inc1 "Birth year of parent who has highest income in l_lifeinc5359"

gen 	p_experiment_inc1 = f_experiment if f_l_lifeinc5359>=m_l_lifeinc5359 & f_l_lifeinc5359!=. & m_l_lifeinc5359!=.
replace p_experiment_inc1 = m_experiment if f_l_lifeinc5359< m_l_lifeinc5359 & f_l_lifeinc5359!=. & m_l_lifeinc5359!=.
replace p_experiment_inc1 = f_experiment if f_l_lifeinc5359!=. & m_l_lifeinc5359==.
replace p_experiment_inc1 = m_experiment if f_l_lifeinc5359==. & m_l_lifeinc5359!=.
label var p_experiment_inc1 "=1 if parent who has highest income in l_lifeinc5359 was subject to reform"

gen 	p_kommun60_inc1 = f_kommun60_ if f_l_lifeinc5359>=m_l_lifeinc5359 & f_l_lifeinc5359!=. & m_l_lifeinc5359!=.
replace p_kommun60_inc1 = m_kommun60_ if f_l_lifeinc5359< m_l_lifeinc5359 & f_l_lifeinc5359!=. & m_l_lifeinc5359!=.
replace p_kommun60_inc1 = f_kommun60_ if f_l_lifeinc5359!=. & m_l_lifeinc5359==.
replace p_kommun60_inc1 = m_kommun60_ if f_l_lifeinc5359==. & m_l_lifeinc5359!=.
label var p_kommun60_inc1 "Municipality of parent who has highest income in l_lifeinc5359"

gen 	p_byear_inc2 = f_byear if f_l_lifeinc3545>=m_l_lifeinc3545 & f_l_lifeinc3545!=. & m_l_lifeinc3545!=. 
replace p_byear_inc2 = m_byear if f_l_lifeinc3545< m_l_lifeinc3545 & f_l_lifeinc3545!=. & m_l_lifeinc3545!=.
replace p_byear_inc2 = f_byear if f_l_lifeinc3545!=. & m_l_lifeinc3545==.
replace p_byear_inc2 = m_byear if f_l_lifeinc3545==. & m_l_lifeinc3545!=.
label var p_byear_inc2 "Birth year of parent who has highest income in l_lifeinc3545"

gen 	p_experiment_inc2 = f_experiment if f_l_lifeinc3545>=m_l_lifeinc3545 & f_l_lifeinc3545!=. & m_l_lifeinc3545!=.n
replace p_experiment_inc2 = m_experiment if f_l_lifeinc3545< m_l_lifeinc3545 & f_l_lifeinc3545!=. & m_l_lifeinc3545!=.
replace p_experiment_inc2 = f_experiment if f_l_lifeinc3545!=. & m_l_lifeinc3545==.
replace p_experiment_inc2 = m_experiment if f_l_lifeinc3545==. & m_l_lifeinc3545!=.
label var p_experiment_inc2 "=1 if parent who has highest income in l_lifeinc3545 was subject to reform"

gen 	p_kommun60_inc2 = f_kommun60_ if f_l_lifeinc3545>=m_l_lifeinc3545 & f_l_lifeinc3545!=. & m_l_lifeinc3545!=.
replace p_kommun60_inc2 = m_kommun60_ if f_l_lifeinc3545< m_l_lifeinc3545 & f_l_lifeinc3545!=. & m_l_lifeinc3545!=.
replace p_kommun60_inc2 = f_kommun60_ if f_l_lifeinc3545!=. & m_l_lifeinc3545==.
replace p_kommun60_inc2 = m_kommun60_ if f_l_lifeinc3545==. & m_l_lifeinc3545!=.
label var p_kommun60_inc2 "Municipality of parent who has highest income in l_lifeinc3545"

* Parental age at birth
gen f_ageatbirth = foddar - f_byear
label var f_ageatbirth "Father's age at birth"
gen m_ageatbirth = foddar - m_byear
label var m_ageatbirth "Mother's age at birth"
gen p_ageatbirth_ed = foddar - p_byear_ed
label var p_ageatbirth_ed "Parent's age at birth, education"
gen p_ageatbirth_inc1 = foddar - p_byear_inc1
label var p_ageatbirth_inc1 "Parent's age at birth, income 1st gen"
gen p_ageatbirth_inc2 = foddar - p_byear_inc2
label var p_ageatbirth_inc2 "Parent's age at birth, income 2nd gen"

* Sample dummies
gen byte sample_1=1 if foddar!=. & f_byear!=. 
gen byte sample_2=1 if foddar!=. & f_byear!=. & f_ageatbirth<33
gen byte sample_3_ed=1 if foddar!=. & p_byear_ed!=.
gen byte sample_3_inc1=1 if foddar!=. & p_byear_inc1!=.
gen byte sample_3_inc2=1 if foddar!=. & p_byear_inc2!=.
gen byte sample_4_ed=1 if foddar!=. & p_byear_ed!=. & p_ageatbirth_ed<33
gen byte sample_4_inc1=1 if foddar!=. & p_byear_inc1!=. & p_ageatbirth_inc1<33
gen byte sample_4_inc2=1 if foddar!=. & p_byear_inc2!=. & p_ageatbirth_inc2<33

label var sample_1 "Var=1 if using father's education, no age restrictions"
label var sample_2 "Var=1 if using father's education, father's age at birth max 32"
label var sample_3_ed "Var=1 if using max of parents' education, no age restrictions"
label var sample_3_inc1 "Var=1 if using max of parents' income, 1st gen, no age restrictions"
label var sample_3_inc2 "Var=1 if using max of parents' income, 2nd gen, no age restrictions"
label var sample_4_ed "Var=1 if using max of parents' education, parent's age at birth max 32"
label var sample_4_inc1 "Var=1 if using max of parents' income, 1st gen, parent's age at birth max 32"
label var sample_4_inc2 "Var=1 if using max of parents' income, 2nd gen, parent's age at birth max 32"

** 10. Store main files

compress
sort lopnr
save ${path2}Dat/main_file_income.dta, replace

drop rinc*
drop f_rinc*
drop m_rinc*

compress
save ${path2}Dat/main_file.dta, replace

rm ${path2}Dat/temp.dta
rm ${path2}Dat/temp2.dta
